Bar length corresponds to Duration, and reflects the absolute durations of words/pauses. Bar length is normalized w.r.t. the longest reported duration in the data for an element, which is ~ 2,450ms long:
Bar color corresponds to Mean F0 and reflects the relative mean F0 of words compared to other in the same passage. The following diverging color palette is used to mark relative mean F0, where blue hues are below average and red hues are above average.
library(dplyr)
library(readr)
library(scales)
library(colorspace)
library(stringr)
library(glue)
# Read in and process data
data_raw <- read_csv("C:/Users/jchoe/Downloads/may_fixed_encoding.txt")
df <- data_raw %>%
mutate(
Duration = abs(Duration),
across(contains("F0"), ~ as.numeric(replace(.x, which(.x == "--undefined--"), NA))),
across(c(Passage, Sentence, MeasurementOrder), as.integer),
across(contains("F0"), ~ replace(.x, which(data_raw$MeasurementLabel == "sp"), NA))
) %>%
select(-Speaker)
# Helper function
html_word_bar <- function(word, fill, color, padding, size, margin, tooltip, border_style) {
stringr::str_squish(glue::glue("
<div class='elem-container'>
<div class='tooltip'>
<span style='margin: {margin}px; background-color: {fill};\\
border: 1px {border_style} black; padding-left: {padding}px;\\
padding-right: {padding}px; font-size: {size}px'></span>
<span class='tooltiptext'>{tooltip}</span>
</div>
<br>
<span style='margin: {margin}px; color: {color}'>{word}</span>
</div>
"))
}
# Add styles
df_combined <- df %>%
group_by(Passage, Sentence) %>%
filter(!(MeasurementOrder %in% range(MeasurementOrder) & MeasurementLabel == "sp")) %>%
ungroup() %>%
mutate(
dur_scaled = (Duration - min(Duration))/diff(range(Duration)),
padding = round(dur_scaled * 150 / 2, 1),
word = replace(MeasurementLabel, MeasurementLabel == "sp", "◌")
) %>%
group_by(Passage) %>% # Normalize F0 by passage
mutate(
fill = scales::col_numeric(
palette = colorspace::diverge_hcl(7, palette = 'Blue-Red 2'),
na.color = "white",
domain = MeanF0
)(MeanF0),
color = "black"
) %>%
ungroup() %>%
mutate(
border_style = ifelse(MeasurementLabel == "sp", "dotted", "solid"),
tooltip = glue("{round(Duration * 1000)}ms{ifelse(!is.na(MeanF0), paste0(', ', round(MeanF0), 'Hz'), '')}"),
html = html_word_bar(word, fill, color, padding, size = 3, margin = 5, tooltip, border_style)
)
# Add tooltip and collapse
df_tbl_combined <- df_combined %>%
group_by(Passage, Sentence) %>%
summarize(
Label = stringr::str_squish(paste(html, collapse = "")),
.groups = 'drop'
)
# Render Table (DT)
library(DT)
df_tbl_combined %>%
add_count(Passage) %>%
mutate(Passage = glue("Passage {Passage} ({n})")) %>%
select(-n) %>%
slice(1:1000) %>%
datatable(
rownames = FALSE,
colnames = c("Utterance" = "Label"),
extensions = 'RowGroup',
escape = 1:2,
options = list(
rowGroup = list(dataSrc = 0),
columnDefs = list(list(visible = FALSE, targets = 0))
)
)
# Render Table (static reactable)
# library(reactable)
# df_tbl_combined %>%
# reactable(
# groupBy = "Passage",
# searchable = TRUE,
# pagination = TRUE,
# columns = list(
# Passage = colDef(width = 100),
# Sentence = colDef(width = 100),
# Label = colDef(
# html = TRUE,
# style = list(overflow = "visible"),
# resizable = TRUE
# )
# ),
# style = list(
# margin = "100px"
# )
# )